# load appropr data

getmode <- function(v, na.omit = TRUE){
  if (na.omit == TRUE) {
    v <- na.omit(v)  
  }
  uv <- unique(v)
  tab <- tabulate(match(v, uv))
  out <- uv[tab == max(tab)]
  if(length(out) > 1){
    out <- sample(out,1)
  }
  return(out)
}


crosswalk <- readxl::read_xlsx("science_crosswalk.xlsx", sheet = 1)

toplines <- readxl::read_xlsx("science_toplines.xlsx", sheet = 1)
covars <- readxl::read_xlsx("science_covariates.xlsx", sheet = 1)
appropr <- read_csv("science_appropriations.csv")
library(xtable)
crosswalk[crosswalk$comp_acc_id %in% appropr$comp_acc_id,] %>% dplyr::select(account) %>% xtable() %>% print(include.rownames = F)


crosswalk <- crosswalk %>% 
  separate(col = account, sep = "--", 
           into = c("department", "program"), 
           extra  = "merge")
crosswalk <- crosswalk %>% 
  separate(col = comp_acc_id, sep = "_", 
           into = c("department_id", "program_id"), 
           extra  = "merge", remove = F)

appropr <- appropr %>% filter(comp_acc_id %in% crosswalk$comp_acc_id)
appropr <- appropr %>% left_join(crosswalk)

appropr <- appropr %>% mutate(pl_delta = pl - pl_lag)
appropr <- appropr %>% mutate(pl_delta_constant = pl_constant - pl_lag_constant)

appropr$funder <- appropr$comp_acc_id

covars <- covars %>% mutate(fiscal_year = as.numeric(cal_year) + 1) %>% dplyr::select(fiscal_year, war,unempl_rate, gdp_per_change, real_deficit)
appropr <- appropr %>% left_join(covars)
appropr <- appropr %>% left_join(toplines)


crosswalk[crosswalk$comp_acc_id %in% appropr$comp_acc_id,] %>% dplyr::select()

# load funding data

funding <- read_csv("us_funding_by_funder_by_year.csv")
funding_cats <- read_csv("USA_Gov_Funder_List.csv")
funding <- funding %>% left_join(funding_cats)


funding$sen_party <- "R" # (data$majority_sen == 1)
funding[(funding$majority_sen == 1),]$sen_party <- "D"

funding$house_party <- "R" # (data$majority_sen == 1)
funding[(funding$majority_house == 1),]$house_party <- "D"

funding$pres_party <- "R" # (data$majority_sen == 1)
funding[(funding$president_party == 1),]$pres_party <- "D"

funding$fiscal_year <- funding$start_year

funding <- funding %>%
  group_by(funder) %>%
  mutate(funding_usd_lag = lag(funding_usd, order_by=start_year),
         funding_usd_adj_lag = lag(funding_usd_adj, order_by=start_year)) %>% 
  mutate(funding_usd_delta = funding_usd - funding_usd_lag,
         funding_usd_adj_delta = funding_usd_adj - funding_usd_adj_lag)

# unified modifications

funding <- funding %>% left_join(toplines)

cols <- c("pres_party", "sen_party", "house_party")
appropr <- unite(appropr, gov, cols, remove=FALSE)
funding <- unite(funding, gov, cols, remove=FALSE)

appropr$ARRA <- 0
appropr$ARRA[appropr$fiscal_year %in% c(2009, 2010, 2011)] <- 1
funding$ARRA <- 0
funding$ARRA[funding$fiscal_year %in% c(2009, 2010, 2011)] <- 1

appropr$department <- str_trim(appropr$department)
appropr <- appropr %>% mutate(major_group = case_when(NSF == 1 ~ "NSF", 
                                                      NIH == 1 ~ "NIH",
                                                      CDC == 1 ~ "CDC",
                                                      DEFENSE == 1 ~ "Defense", 
                                                      ENERGY == 1 ~ "Energy", 
                                                      department == "National Aeronautics and Space Administration" ~ "NASA", 
                                                      department == "Department of Commerce" ~ "Commerce",
                                                      department == "Department of Agriculture" ~ "Agriculture",
                                                      department == "Department of Transportation" ~ "Transportation"))


appropr <- filter(appropr, fiscal_year < 2021)
funding <- filter(funding, fiscal_year < 2021)

NIH <- c("National Cancer Institute", "National Institute on Aging" ,"National Heart Lung and Blood Institute","National Eye Institute",                                                           
         "National Institute of Child Health and Human Development","National Institute of Diabetes and Digestive and Kidney Diseases", 
         "National Institute On Alcohol Abuse and Alcoholism","National Institute of Neurological Disorders and Stroke","National Institute of Dental and Craniofacial Research",                           
         "National Institute of Environmental Health Sciences","National Center for Advancing Translational Sciences","National Human Genome Research Institute",                                         
         "National Institute on Drug Abuse","National Institute of General Medical Sciences","National Institute on Deafness and Other Communication Disorders",                 
         "National Institute of Mental Health", "National Institute of Arthritis and Musculoskeletal and Skin Diseases","National Institute of Biomedical Imaging and Bioengineering",                      
         "National Institute of Allergy and Infectious Diseases","National Institute on Minority Health and Health Disparities","National Center for Complementary and Integrative Health", 
         "National Institute of Nursing Research","National Center on Birth Defects and Developmental Disabilities", "National Institutes of Health Clinical Center", 
         "National Center for Emerging and Zoonotic Infectious Diseases", "Fogarty International Center")

NSF <- c("Office of Budget, Finance and Award Management",
         "Directorate for Education & Human Resources",
         "Office of Inspector General",
         "National Science Board",
         "National Science Foundation",
         "Directorate for Engineering",
         "Directorate for Computer & Information Science & Engineering",
         "Directorate for Biological Sciences",
         "Directorate for Mathematical & Physical Sciences",
         "Directorate for Geosciences",
         "Directorate for Social, Behavioral & Economic Sciences")

acc <- c(NIH, NSF)

funding_nih_nsf <- funding %>% filter(funder %in% acc)

colnames(appropr)

#drop one row that is a data entry error
appropr <- appropr %>% filter(!(comp_acc_id == "02_10_02"	& fiscal_year == 2013 & agency_id == "il04_02"))



sum_vars <- c("pb", "hc", "sc", "pl", 
              "pb_constant", "hc_constant", "sc_constant", 
              "pl_constant", "pl_lag_constant", "pl_lag")

# Summarize the dataset
appropr <- appropr %>%
  group_by(comp_acc_id, fiscal_year) %>%
  summarise(across(all_of(sum_vars), sum, na.rm = F),
            across(!all_of(c(sum_vars)), 
                   ~ getmode(.x)),
            .groups = "drop")

